1. Preparation

Set working directory

# change this value if needed
setwd("/Users/gabays/github/RiseAndFall")
getwd()
## [1] "/Users/gabays/github/RiseAndFall"

Load packages

if(!require("ggplot2")){
  install.packages("ggplot2")
  library(ggplot2)
}
if(!require("roll")){
  install.packages("roll")
  library(roll)
}
if(!require("purrr")){
  install.packages("purrr")
  library(purrr)
}
if(!require("stylo")){
  install.packages("stylo")
  library(stylo)
}

Load external functions

source("./R/functions.R")

Load previously computed data

load("./R/RiseAndFall.RData")

2. Set up

2.1 As 3-grams

Get Metadata and corpus as 3-grams

#Metadata
metadata = read.csv(file="./metadata.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
metadata = metadata[sort(rownames(metadata)), ]
#data
data =  read.csv(file="./feats_tests_n3_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
data_stop =  read.csv(file="./feats_tests_n1_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)

We compute de distribution: which 3grams are relevant?

dist = colSums(data[,3:ncol(data)])
plot(dist,type = "l")

We work with the 2000 most frequent 3grams – after that the curve is totally flat

d <- data[,3:2003]
d_stop <- data_stop[,3:length(data_stop)]

We normalise the vectors

d <- t(d)
#normalisation
d <- normalisations(d)
#stopwords
d_stop <- t(d_stop)
d_stop <- normalisations(d_stop)

We add the metadata

# we add the metadata
control<-rbind(rownames(metadata),colnames(d))
# we control that we have similar values
head(t(control))
##      [,1]                                  
## [1,] "abeille-argelie"                     
## [2,] "abeille-coriolan"                    
## [3,] "abeille-lyncee"                      
## [4,] "about-risette"                       
## [5,] "adenis-homme-qui-ne-peut-pas-siffler"
## [6,] "aigueberre-avare-amoureux"           
##      [,2]                                      
## [1,] "abeille-argelie.txt"                     
## [2,] "abeille-coriolan.txt"                    
## [3,] "abeille-lyncee.txt"                      
## [4,] "about-risette.txt"                       
## [5,] "adenis-homme-qui-ne-peut-pas-siffler.txt"
## [6,] "aigueberre-avare-amoureux.txt"
tail(t(control))
##         [,1]                                
## [1511,] "voltaire-tanis-zelide"             
## [1512,] "voltaire-zaire"                    
## [1513,] "voltaire-zulime"                   
## [1514,] "vondrebeck-alard-forces-de-l-amour"
## [1515,] "zola-madeleine"                    
## [1516,] "zola-therese-raquin"               
##         [,2]                                    
## [1511,] "voltaire-tanis-zelide.txt"             
## [1512,] "voltaire-zaire.txt"                    
## [1513,] "voltaire-zulime.txt"                   
## [1514,] "vondrebeck-alard-forces-de-l-amour.txt"
## [1515,] "zola-madeleine.txt"                    
## [1516,] "zola-therese-raquin.txt"

2.2 As plain texts (beta)

Alternative: loading plays in plain text (for later)

corpus<-list()
#Get the list of all txt files
TxtFiles <- list.files(path = "txt",pattern = "txt$")
#Loop over all files
for(x in TxtFiles){
  #Get the path
  FullPath <- paste("txt", x, sep="/")
  #Get the name (drop .txt extension)
# TextName <- sub("\\.txt", "", x)
  #Get the text in the file
#  FullText <- suppressWarnings(read.csv(FullPath, header = FALSE, sep = "\n", fileEncoding="UTF-8"))
  FullText <- suppressWarnings(readLines(FullPath))
    #Append the text to the corpus
  corpus<-append(corpus,FullText)
}
View(corpus)

We transform the loaded texts into minable data

#tokenisation
corpus.tok = lapply(corpus, txt.to.words2)
#Counting frequency of tokens
corpus.tok.list = make.frequency.list(corpus.tok)
#Transform frequency into a table
corpus.tok.list.freq=make.table.of.frequencies(corpus.tok, corpus.tok.list, relative = F)
## processing  1516  text samples
## .......................................................................................................................................................
## combining frequencies into a table...
#I name columns
row.names(corpus.tok.list.freq)=TxtFiles
#I save a copy
write.csv(corpus.tok.list.freq, file = "corpus.bench.tok.list.freq.csv",row.names=TRUE)
#Convert table into dataframe
corpus.tok.list.freq = as.data.frame(read.csv(file="corpus.bench.tok.list.freq.csv", sep = ",", header = TRUE, row.names=1, quote = '\"'))
#transposition (rows become columns)
corpus.tok.list.freq = t(corpus.tok.list.freq)
#normalisation
corpus.tok.list.freq = normalisations(corpus.tok.list.freq)
#Displaying the dataframe
View(corpus.tok.list.freq)

3 Preparation

3.1 Generic value of features

3.1.1 generic value of stopwords

We control that stopwords do identify genres

distToTragedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "tragedy"]))

distToComedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "comedy"]))

m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")

#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")

#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
     lty = 1,      # Grid line type
     col = "gray", # Grid line color
     lwd = 1)      # Grid line width

# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays

#Save the image
png("./R/images/clusters_stop.png", width = 2500, height = 2000, res=300) 
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() 
## quartz_off_screen 
##                 2

We control tragedies classified with comedies:

literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >10)
##                                    DistTragedyCentroid DistComedyCentroid
## aubignac-pucelle-prose.txt                    10.13830           9.848412
## delavigne-famille-temps-luther.txt            10.05154          10.756579
## mathieu-magicienne-etrangere.txt              10.57459          11.594939
## piron-nouvelle-messaline.txt                  10.26150          10.815560
## puget-de-la-serre-pandoste-ii.txt             10.58983          10.440721
## puget-de-la-serre-thesee.txt                  10.67992          10.032731
## puget-de-la-serre-thomas-morus.txt            10.51248          10.700152
## viau-pyrame.txt                               10.18889          11.218962
##                                    literaryGenre
## aubignac-pucelle-prose.txt                    10
## delavigne-famille-temps-luther.txt            10
## mathieu-magicienne-etrangere.txt              10
## piron-nouvelle-messaline.txt                  10
## puget-de-la-serre-pandoste-ii.txt             10
## puget-de-la-serre-thesee.txt                  10
## puget-de-la-serre-thomas-morus.txt            10
## viau-pyrame.txt                               10

We control comedies classified with tragedies:

literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <9)
##                                        DistTragedyCentroid DistComedyCentroid
## archambault-etrennes.txt                          8.422193           8.746238
## chapuzeau-geneve-delivree.txt                     8.680547          11.143438
## cinq-auteurs-comedie-des-tuileries.txt            8.785608          11.072267
## colle-alfonse.txt                                 8.706562          11.740707
## corneillep-illusion-comique.txt                   8.577145          11.471762
## corneillep-melite.txt                             8.889448          10.986803
## corneillet-geolier-de-sois-meme.txt               7.832000          10.080357
## corneillet-illustres-ennemis.txt                  8.475145          10.324881
## cubieres-palmezeaux-lacrymanie.txt                8.509334          11.291061
## labaume-messe-de-gnide.txt                        8.163510          10.605135
## moline-legislatrices.txt                          8.173196           7.149285
## rotrou-bague-de-l-oubli.txt                       8.675853          10.997791
## rotrou-belle-alphrede.txt                         8.238488          11.563174
## rotrou-sosies.txt                                 8.589527          10.456716
## saint-roman-dialogue.txt                          8.869912          11.717547
##                                        literaryGenre
## archambault-etrennes.txt                           2
## chapuzeau-geneve-delivree.txt                      2
## cinq-auteurs-comedie-des-tuileries.txt             2
## colle-alfonse.txt                                  2
## corneillep-illusion-comique.txt                    2
## corneillep-melite.txt                              2
## corneillet-geolier-de-sois-meme.txt                2
## corneillet-illustres-ennemis.txt                   2
## cubieres-palmezeaux-lacrymanie.txt                 2
## labaume-messe-de-gnide.txt                         2
## moline-legislatrices.txt                           2
## rotrou-bague-de-l-oubli.txt                        2
## rotrou-belle-alphrede.txt                          2
## rotrou-sosies.txt                                  2
## saint-roman-dialogue.txt                           2

3.1.2 generic value of stopwords

We control that 3-grams do identify genres

distToTragedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "tragedy"]))

distToComedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "comedy"]))

m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")

#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")

#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
     lty = 1,      # Grid line type
     col = "gray", # Grid line color
     lwd = 1)      # Grid line width

# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays

#Save the image
png("./R/images/clusters_3grams.png", width = 2500, height = 2000, res=300) 
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() 
## quartz_off_screen 
##                 2

Results are more precise than with stopwords.

We control tragedies classified with comedies:

literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >34)
##                                    DistTragedyCentroid DistComedyCentroid
## champrepus-ulysse.txt                         35.01097           36.45605
## delavigne-famille-temps-luther.txt            34.70682           35.44638
## deshoulieres-mort-de-cochon.txt               34.30966           31.55398
## puget-de-la-serre-pandoste-ii.txt             35.03667           35.12204
## soret-ceciliade.txt                           34.31754           34.05263
##                                    literaryGenre
## champrepus-ulysse.txt                         10
## delavigne-famille-temps-luther.txt            10
## deshoulieres-mort-de-cochon.txt               10
## puget-de-la-serre-pandoste-ii.txt             10
## soret-ceciliade.txt                           10

We control comedies classified with tragedies:

literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <29)
##                             DistTragedyCentroid DistComedyCentroid
## diderot-fils-naturel.txt               28.91923           26.40822
## doruxigne-alzate.txt                   28.82176           34.88925
## genlis-belle-et-la-bete.txt            27.18629           20.95635
##                             literaryGenre
## diderot-fils-naturel.txt                2
## doruxigne-alzate.txt                    2
## genlis-belle-et-la-bete.txt             2

3.2 Within inertia of the two clusters

#Get all possible genres
levels(metadata[, "Genre"])
##  [1] ""            "comedy"      "dialogue"    "drama"       "farce"      
##  [6] "monologue"   "opera"       "proverbe"    "saynete"     "tragedy"    
## [11] "tragicomedy" "vaudeville"
#Get genre of all plays
clusters <- metadata[, "Genre"]
levels(clusters) <- c(1:13)

#Compute inertia
clusterInertia(t(d), as.numeric(clusters))
##  [1] 198.08189 625.92647  23.60303  29.55105  10.32456  56.62311  11.61778
##  [8] 112.83419  21.57262 212.44350  61.53742   5.48289

3.3 Centroid distance

Comedy (with 3-grams)

comedies = d[, metadata[, "Genre"] == "comedy"]
comediesToCentroid = DistToCentroid(comedies, method="manhattan")

summary(comediesToCentroid)
##  DistToCentroid 
##  Min.   :20.96  
##  1st Qu.:30.10  
##  Median :31.50  
##  Mean   :31.35  
##  3rd Qu.:32.79  
##  Max.   :37.47
boxplot(comediesToCentroid)

# Most typical comedies
head(comediesToCentroid[order(comediesToCentroid[, 1]),])
##        genlis-belle-et-la-bete.txt     lesage-dorneval-ile-gougou.txt 
##                           20.95635                           21.58979 
##             liborliere-cloison.txt         dancourt-mari-retrouve.txt 
##                           25.91816                           26.15407 
##     palissot-barbier-de-bagdad.txt dancourt-impromptu-de-garnison.txt 
##                           26.17055                           26.28167
# Less typical comedies
tail(comediesToCentroid[order(comediesToCentroid[, 1]),])
##                 colle-alfonse.txt   corneillep-illusion-comique.txt 
##                          36.08137                          36.64387 
##     chapuzeau-geneve-delivree.txt quinault-comedie-sans-comedie.txt 
##                          36.64600                          36.71626 
##         rotrou-belle-alphrede.txt                colle-cocatrix.txt 
##                          36.87928                          37.47040

Tragedies (with 3-grams)

tragedies = d[, metadata[, "Genre"] == "tragedy"]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")

summary(tragediesToCentroid)
##  DistToCentroid 
##  Min.   :18.76  
##  1st Qu.:24.87  
##  Median :26.27  
##  Mean   :26.82  
##  3rd Qu.:28.29  
##  Max.   :35.04
boxplot(tragediesToCentroid)

# Most typical comedies
head(tragediesToCentroid[order(tragediesToCentroid[, 1]),])
##  la-thuilerie-soliman.txt barbier-mort-de-cesar.txt      saurin-spartacus.txt 
##                  18.76257                  20.31262                  21.40985 
##       genest-zelonide.txt      pellegrin-tibere.txt     voltaire-mariamne.txt 
##                  21.86938                  22.19090                  22.40103
# Less typical comedies
tail(tragediesToCentroid[order(tragediesToCentroid[, 1]),])
##       puget-de-la-serre-thesee.txt    deshoulieres-mort-de-cochon.txt 
##                           33.84736                           34.30966 
##                soret-ceciliade.txt delavigne-famille-temps-luther.txt 
##                           34.31754                           34.70682 
##              champrepus-ulysse.txt  puget-de-la-serre-pandoste-ii.txt 
##                           35.01097                           35.03667

4 Generic evolution

4.1 Tragedy

4.1.1 With 3-grams

tragedies <- d[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
ggsave("./R/images/tragedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

4.1.2 With words

tragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedyW.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

4.1.3 With stopwords

tragedies <- d_stop[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

4.2 Comedy

4.2.1 With 3-grams

tragedies <- d[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

4.2.2 With words

tragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedyW.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

4.2.3 With stopwords

tragedies <- d_stop[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
  geom_point(stat = "summary", fun = "mean")+ 
  geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedy3.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5 Distance to auctorial centroid vs generic centroid

5.1 Comedy

We select all the authors with more than 3 plays in the dataset

#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]

#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)
## [1] 113

We select all the authors who have more than 1 comedy

authorsSelected=c()
for (x in multiples){
  results <- as.data.frame(metadata[metadata$Genre == 'comedy' & metadata$Author == x,])
  if(nrow(results)>2){
  authorsSelected<-append(authorsSelected,x)
  }
}

We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:

#create an empty df to store results
df_comedy=data.frame(matrix(ncol = 7, nrow = 0))

#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0

#loop over plays
for (x in plays){
  #increment
  incr<-incr+1
  #get author name
  author <- metadata[incr,2]
  #get genre
  genre<-metadata[incr,4]
  #get date
  date<-metadata[incr,3]
  #if author has written multiple texts present in the corpus and genre is known
   if (author %in% authorsSelected==TRUE & genre =='comedy' & date>1500 & date<1800){
    #get the data of all the plays of an author
    authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
#    authorData = d[, metadata[, "Author"] == author]
    #get all the plays of the same genre
#    genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
    genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
    #compute distance to centroid of the author
    authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
    #compute distance to centroid of the genre
    genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
    #compute the mean of the centroid to the genre
    meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
    #get the distance of the play to the author
    playDistAuthor<-authorToCentroid[x,]
    #get the distance of the play to the genre
    playDistGenre<-genreToCentroid[x,]
    #diff author genre (rounded)
    distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
    #distance<-round(playDistGenre-playDistAuthor, digits = 2)
    #save the result if value is not zero (problem occurred)
    control<-playDistAuthor!=''
    if (is.na(control)==FALSE){
      results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
      df_comedy<-rbind(df_comedy,results)
    }
  }
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_comedy)
colnames(df_comedy) <- x

5.1.1 Distance to author

#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author))+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

5.1.1.1 Marivaux

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 240] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_marivaux.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.1.2 Voltaire

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 213] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.1.3 Boissy

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 68] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_boissy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.1.4 Molière

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 262] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_comedy_moliere.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.2 Distance to genre

#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author))+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre") +
  theme_bw() + geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre") +
  theme_bw() #+ geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

5.1.2.1 Marivaux

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 240] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_marivaux.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.2.2 Voltaire

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 213] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.2.3 Boissy

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 68] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_boissy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.2.4 Molière

df_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 262] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_moliere.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.1.3 Distance between the two centroids

#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance")+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Controlling variance

# with 4 plays
#x <- 1:71
x <- 1:nrow(df_comedy)
y<-roll_var(as.numeric(df_comedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)

png("./R/images/variance2.png", width = 850, height = 600) 
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() 
## quartz_off_screen 
##                 2

5.2 Tragedy

We select all the authors with more than 3 plays in the dataset

#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]

#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)
## [1] 113

We select all the authors who have more than 1 tragedy

authorsSelected=c()
for (x in multiples){
  results <- as.data.frame(metadata[metadata$Genre == 'tragedy' & metadata$Author == x,])
  if(nrow(results)>2){
  authorsSelected<-append(authorsSelected,x)
  }
}

We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:

#create an empty df to store results
df_tragedy=data.frame(matrix(ncol = 7, nrow = 0))

#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0

#loop over plays
for (x in plays){
  #increment
  incr<-incr+1
  #get author name
  author <- metadata[incr,2]
  #get genre
  genre<-metadata[incr,4]
  #get date
  date<-metadata[incr,3]
  #if author has written multiple texts present in the corpus and genre is known
   if (author %in% authorsSelected==TRUE & genre =='tragedy' & date>1500 & date<1800){
    #get the data of all the plays of an author
    authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
#    authorData = d[, metadata[, "Author"] == author]
    #get all the plays of the same genre
#    genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
    genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
    #compute distance to centroid of the author
    authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
    #compute distance to centroid of the genre
    genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
    #compute the mean of the centroid to the genre
    meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
    #get the distance of the play to the author
    playDistAuthor<-authorToCentroid[x,]
    #get the distance of the play to the genre
    playDistGenre<-genreToCentroid[x,]
    #diff author genre (rounded)
    distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
    #distance<-round(playDistGenre-playDistAuthor, digits = 2)
    #save the result if value is not zero (problem occurred)
    control<-playDistAuthor!=''
    if (is.na(control)==FALSE){
      results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
      df_tragedy<-rbind(df_tragedy,results)
    }
  }
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_tragedy)
colnames(df_tragedy) <- x

5.2.1 Distance to author

#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author))+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend=FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

5.2.1.1 Voltaire

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 213] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.1.2 Crébillon

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 181] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.1.2 Racine

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 264] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2author)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the author")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2author_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.2 Distance to genre

#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author))+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre") +
  theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend=FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre") +
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

5.2.2.1 Voltaire

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 213] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.2.2 Crébillon

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 181] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.2.3 Racine

df_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 264] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
  geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance to the genre")+
  scale_color_manual(values =c("blue"="blue","white"="transparent"))+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'

5.2.3 Distance between the two centroids

#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
  geom_point(stat = "summary", fun = "mean", aes(colour = author))+ 
  geom_smooth(method = loess, size = 1)+
  xlab("Date") + ylab("Distance")+
  theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
vizEvoCentroid
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Controlling variance

# with 4 plays
#x <- 1:71
x <- 1:nrow(df_tragedy)
y<-roll_var(as.numeric(df_tragedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)

png("./R/images/variance2.png", width = 850, height = 600) 
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() 
## quartz_off_screen 
##                 2